import time
import random
import requests
from lxml import etree
import redis
import hashlib

r = redis.Redis()

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51"}

for page in range(1, 2):
    url = f"https://beijing.anjuke.com/sale/p{page}/"
    html = requests.get(url=url, headers=headers).text
    time.sleep(random.randint(1, 3))
    eobj = etree.HTML(html)
    div_list = eobj.xpath("//div[@class='property']")
    print(len(div_list), div_list)
    for i in div_list:
        title = i.xpath(".//a/div[@class='property-content']/div/div/h3/text()")[0]
        href = i.xpath(".//a/@href")[0]
        total = i.xpath("//*[@id='esfMain']/section/section[3]/section[1]/section[2]/div[1]/a/div[2]/div[2]/p[1]/span[1]/text()")[0]
        print(title, href, total)

        # md5的方式对连接进行加密，目的是减小存储
        m = hashlib.md5()
        m.update(href.encode())
        finger = m.hexdigest()

        # 能添加的上的话返回1
        if r.sadd("lianjia: spider", finger) == 1:
            detail_html = requests.get(url=href, headers=headers)
            print("抓取成功")
        # 不能添加返回0
        else:
            print("已抓取")
